This document presents the subset of the figures used for paper about monitoring.

Load packages

library(MASS)
library(dplyr)
library(tidyr)
library(ggplot2)
library(ggmap)
library(scatterpie)
library(rgdal)
library(multcompView)
library(car)
library(ggpmisc)
library(chisq.posthoc.test)
library(vcd)
library(grid)
library(ggpattern)
library(car)

Sampling data correspond to the data collected with kobo and previously cleaned with the script 1_preprocesamiento_datos_kobo.Rmd.

Load data

# load data
muestreo_tidy<-read.delim("../data/kobo/muestreo_dic2020_tidy.txt", header = TRUE)

#Add column block to muestreo_tidy
muestreo_tidy<-muestreo_tidy%>% mutate(block = case_when(plot %in% c(1,2,3) ~1,
                 plot %in% c(1,2,3) ~1,
                 plot %in% c(4,5,6) ~2,
                 plot %in% c(7,8,9) ~3,
                 plot %in% c(10,11,12) ~4,
                 plot %in% c(13,14,15) ~5,
                 plot %in% c(16,17,18) ~6,
                 plot %in% c(19,20,21) ~7,
                 plot %in% c(22,23,24) ~8,
                 plot %in% c(25,26,27) ~9,
                 plot %in% c(28,29,30) ~10,
                 plot %in% c(31,32,33) ~11,
                 plot %in% c(34,35,36) ~12,
                 plot %in% c(37,38,39) ~13,
                 plot %in% c(40,41,42) ~14,
                 plot %in% c(43,44,45) ~15,
                 plot %in% c(46,47,48) ~16)) %>%
                mutate(block=as.factor(block)) # store as factor, instead of numeric (numbers are IDs)

parcelas_tidy<-read.delim("../data/kobo/parcelas_dic2020_tidy.txt", header = TRUE)

# pivot long plots data to have health data as a single variable
parcelas_long<-pivot_longer(parcelas_tidy, 
                            cols = healthy:worm, 
                            names_to = "tree_health_simplified",
                            values_to = "n_trees")

Data analyzed here correspond only to the trees that were approved during the validation by manually reviewing the photographs in kobotoolbox. Total of 1778 trees sampled, 1765 were approved in the validation.

muestreo_tidy<- filter(muestreo_tidy, X_validation_status=="validation_status_approved")

# write.csv(muestreo_tidy,  file="../../../../Desktop/muestreo_tidy.csv")

Color palettes:

# Make a nice color pallete and legend order for all plots

my_cols=c("darkgreen", 
              "darkred", 
              "orangered1", 
              "cadetblue", 
              "tan", 
              "beige", 
            #  "burlywood4", 
              "coral", 
              "aquamarine3", 
              "gray70", 
              "black")

desired_order=c("healthy", 
                "ozone", 
                "ozone_and_other", 
                "others_combined", 
                "drougth", 
                "fungi", 
             #   "insect", 
                "worm", 
                "acid_rain", 
                "other", 
                "dead")

desired_names=c("healthy", 
                "ozone", 
                "ozone and other", 
                "others combined", 
                "drougth", 
                "fungi", 
             #   "insect", 
                "worm", 
                "acid rain", 
                "other", 
                "dead")

spanish_labels=c("Sano", 
                  "Ozono", 
                  "Ozono y otros", 
                  "Otros combinados no-ozono", 
                  "Sequía", 
                  "Hongos", 
               #   "Insectos", 
                  "Gusano de seda", 
                  "Lluvia acida", 
                  "Otro", 
                  "Muerto")

# For ozone damage percentage 
 my_cols2<-c("darkgreen", "gold2", "chocolate1", "orangered", "red4", "darkorchid4")
 
desired_order_percentage<-c("0%","less than 10%", "10 to 40%", "40 to 50%", "50 to 70%", "more than 70%")

Multiplot fun:

# Multiple plot function
#
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols:   Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  library(grid)

  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)

  numPlots = length(plots)

  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                    ncol = cols, nrow = ceiling(numPlots/cols))
  }

 if (numPlots==1) {
    print(plots[[1]])

  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))

    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))

      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}

Configure google api for maps:

# code adapted from https://rgraphgallery.blogspot.com/2013/04/rg-plot-pie-over-g0ogle-map.html

## configure google api

# You first need to register your api key in https://cloud.google.com/maps-platform/#get-started and follow instructions. The geocoding API is a free service, but you nevertheless need to associate a credit card with the account. Please note that the Google Maps API is not a free service. There is a free allowance of 40,000 calls to the geocoding API per month, and beyond that calls are $0.005 each.
# after you obtain your api, save it in /scripts/api_key.api (not shown in this repo por obvious reasons).

# if you get the following error when running get_map():

#"Error in aperm.default(map, c(2, 1, 3)) : 
#  invalid first argument, must be an array " 

# check this troubleshooting: https://rgraphgallery.blogspot.com/2013/04/rg-plot-pie-over-google-map.html

##  load and register api
api <- readLines("api_key.api")
register_google(key = api)

Map and monitoring figures presented in the paper:

Figure 2

Plot 2a: PNDL location on CDMX map

# get cdmx shape
CDMX<-readOGR(dsn="../data/spatial", layer="CDMX")
## OGR data source with driver: ESRI Shapefile 
## Source: "/Users/veronicareyesgalindo/Documents/GitHub/monitoreo-oyameles/data/spatial", layer: "CDMX"
## with 1 features
## It has 8 fields
CDMX<-fortify(CDMX)

# get PNDL shape
PNDL<-readOGR(dsn="../data/spatial", layer="Desierto_Leones_Geo_ITRF08")
## Warning in OGRSpatialRef(dsn, layer, morphFromESRI = morphFromESRI, dumpSRS =
## dumpSRS, : Discarded datum International_Terrestrial_Reference_Frame_2008 in
## Proj4 definition: +proj=longlat +ellps=GRS80 +no_defs
## OGR data source with driver: ESRI Shapefile 
## Source: "/Users/veronicareyesgalindo/Documents/GitHub/monitoreo-oyameles/data/spatial", layer: "Desierto_Leones_Geo_ITRF08"
## with 1 features
## It has 14 fields
PNDL<-fortify(PNDL)

# get background map
sat_map = get_map(location = c(lon = -99.133549, lat = 19.3), zoom = 10, maptype = 'terrain-background', source = "google")

## plot
p_a<-ggmap(sat_map) + 
            geom_polygon(data = CDMX,
                         aes(x = long, y = lat, group = group),
                         color="black", fill=NA, size=1.5) +
            geom_polygon(data = PNDL,
                         aes(x = long, y = lat, group = group),
                         color="red", fill=NA, size=1.5) +
            geom_point(aes(x=-98.95, y=19.6), 
                       shape=0, stroke=2, size=5, color="black") +
            geom_point(aes(x=-98.95, y=19.55), 
                       shape=0, stroke=2, size=5, color="red") +
            geom_text(aes(label="CDMX", x=-98.87, y=19.6), 
                      color="Black", fontface="bold", size=5) +
            geom_text(aes(label="PNDL", x=-98.87, y=19.55), 
                      color="Black", fontface="bold", size=5) +
            theme(text = element_text(size = 20))+
  ggtitle("a)")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.

Plot 2b: Satellite image and surroundings of the PNDL

# get background map
sat_map = get_map(location = c(lon = -99.30, lat = 19.31), zoom = 13, maptype = 'satellite', source = "google")

## add towns names
towns<-data.frame(nombre=c("San Bartolo Ameyalco", 
                           "Santa Rosa Xochiac", 
                           "San Mateo Tlaltenango"),
                  long=c(-99.270, -99.29, -99.276),
                  lat=c(19.333, 19.325, 19.346))



## plot
p_b<-ggmap(sat_map) + 
            geom_polygon(data = PNDL,
                         aes(x = long, y = lat, group = group),
                         color="red", fill=NA, size=1.5) +
            geom_point(data=towns, aes(x=long, y=lat), colour="red", size=1.5) +
            geom_text(data=towns, aes(label=nombre, x=long, y=lat), 
                      color="white", fontface="bold",
                      size=5, nudge_y=0.003) +
  # add Cruz de Coloxtitla (CX), and Convento (Cn) landmarks
            geom_text(aes(label="X", x=-99.3014, y=19.286068), 
                      color="white", fontface="bold", size=4) +
            geom_text(aes(label="C", x=-99.31, y=19.3133), 
                      color="white", fontface="bold", size=4) +
            theme(text = element_text(size = 20))+
  ggtitle("b)")

Plot 2c: This is the distribution of the 48 plots:

## plot map
# get map
sat_map = get_map(location = c(lon = -99.3060, lat = 19.2909), zoom = 14, maptype = 'satellite', source = "google")

# plot sampled plots
p_c <-  ggmap(sat_map)
p_c <- p_c + geom_point(data=parcelas_tidy,
                      aes(x=X_coordinates_longitude,
                          y=X_coordinates_latitude),
                      color="red") +
          geom_text(data=parcelas_tidy,
                      aes(x=X_coordinates_longitude,
                          y=X_coordinates_latitude,
                          label=plot),
                      color="white",
                     check_overlap = TRUE,
                      hjust = 0, vjust=1, nudge_x = 0.0005,
                 size= 5) +
    theme(text = element_text(size = 20))+
  ggtitle("c)")
p_c 

Plot 2d: Distribution of tree health status by plot

The following figure shows the total number of trees sampled in each 10x10 m plot, and how many of these are under some category of damage:

parcelas_HOO<-parcelas_long%>%
  filter(tree_health_simplified ==  "ozone" )
  sum(parcelas_HOO$n_trees)
## [1] 125
parcelas_HOO<-parcelas_long%>%
  filter(tree_health_simplified == "healthy" | tree_health_simplified == "ozone" | tree_health_simplified == "ozone_and_other" )


p_d <- ggplot(parcelas_HOO, aes(x=plot, y=n_trees,     fill=tree_health_simplified)) +
  geom_bar(stat="identity") +
  scale_fill_manual(values= my_cols, breaks = desired_order,
                    labels= desired_names,
                    name= "Health status") 
  

p_d <- p_d + theme_bw() +
  labs(x="Plots", y= "Number of trees") +
  theme(text = element_text(size = 20)) +
  ggtitle("d)")+
  coord_flip()

Multiplot Fig 2

fig_2<-multiplot(p_a, p_c, p_b, p_d, cols=2)

fig_2
## NULL
ggsave("../outputs/Fig_2.png",
       dpi = 600)

Figure 3

####### Codigo Vero Examine percentage of ozone damage by  plot and category reforested/natural exposed/covered
# condition_data<-dplyr::select(muestreo_tidy, contains(c("plot", "block","tree_health_simplified", "reforested")))%>%
#   filter(tree_health_simplified == "healthy"| tree_health_simplified == "ozone" |tree_health_simplified == "ozone_and_other")
# #0 es no y 1 en yes
# condition_data$reforested<-ifelse(condition_data$reforested=="yes",1,0)
# 
# all_reforested<-condition_data%>%
#   group_by(plot)%>%
#   count()
# 
# oo_per<-condition_data%>%
#   dplyr::filter(tree_health_simplified == "ozone_and_other")%>%
#   group_by(plot) %>% 
#   summarise(reforested = sum(as.logical(reforested)))
# 
# oo_per_natreg<-condition_data%>%
#   dplyr::filter(tree_health_simplified == "ozone_and_other")%>%
#   group_by(plot) %>% 
#   count(plot)
# 
# 
# all_reforested  <- all_reforested %>% 
#   left_join(oo_per, by="plot")
# 
# all_reforested  <- all_reforested %>% 
#   left_join(oo_per_natreg, by="plot")
# 
# 
# all_reforested$oo_per_ref<- (oo_per$reforested*100)/all_reforested$n
# all_reforested$oo_per_natreg<- (oo_per_natreg$n*100)/all_reforested$n
# 
# head(join)
# 
# # Repetir para healthy y ozono
# he_per<-condition_data%>%
#   dplyr::filter(tree_health_simplified == "healthy")%>%
#   group_by(plot) %>% 
#   summarise(reforested = sum(as.logical(reforested)))

#### Try a different approach

## subset of data only with reforestaiton, health and exposition
refo_expo<-muestreo_tidy %>% 
              dplyr::select(plot, block, tree_health_simplified, reforested, tree_exposition) %>% 
           # discard other typos of damage
             filter(tree_health_simplified == "healthy" |
                    tree_health_simplified == "ozone" |
                    tree_health_simplified == "ozone_and_other")


### Summarise and estimate n and % (normalized) by category

## By reforestation
summary_by_ref<- refo_expo %>%  
  group_by(plot, block, tree_health_simplified, reforested) %>% 
  # count how many individuals there are in each category
  summarise(n = n()) %>%
  # create percentage within each plot (summing all rows of each plot = 1)
  mutate(perc = n / sum(n)) %>%
  # normalize it
  mutate(perc.log = log(n / sum(n)))

## By exposition
summary_by_exp<- refo_expo %>%  
  group_by(plot, block, tree_health_simplified, tree_exposition) %>% 
  # count how many individuals there are in each category
  summarise(n = n()) %>%
  # create percentage within each plot (summing all rows of each plot = 1)
  mutate(perc = n / sum(n)) %>%
  # normalize it
  mutate(perc.log = log(n / sum(n)))

  
### Perform Anova
library(car)

# for reforestation
Anova(lm(summary_by_ref$perc.log ~ summary_by_ref$tree_health_simplified * summary_by_ref$reforested + summary_by_ref$block))
# for exposition
Anova(lm(summary_by_exp$perc.log ~ summary_by_exp$tree_health_simplified * summary_by_exp$tree_exposition + summary_by_exp$block))

Figure 3a y b Reforested

# Select tree reforested and covered data 
cont_tab<- dplyr::select(muestreo_tidy, contains(c("tree_health_simplified", "reforested", "tree_exposition"))) %>%
  filter(tree_health_simplified == "healthy"| tree_health_simplified == "ozone" | tree_health_simplified == "ozone_and_other")

table(cont_tab)
## , , tree_exposition = cover
## 
##                       reforested
## tree_health_simplified  no yes
##        healthy         247  75
##        ozone            42  20
##        ozone_and_other 278  41
## 
## , , tree_exposition = exposed
## 
##                       reforested
## tree_health_simplified  no yes
##        healthy         136  31
##        ozone            52  11
##        ozone_and_other 157  23
tab_reforested<- as.table(array(c(sum(with(cont_tab, 
                tree_health_simplified == "healthy" & reforested == "yes")),
                sum(with(cont_tab, tree_health_simplified == "ozone" & reforested == "yes")),
                sum(with(cont_tab, tree_health_simplified == "ozone_and_other" & reforested == "yes")),
                sum(with(cont_tab, tree_health_simplified == "healthy" & reforested == "no")),
                sum(with(cont_tab, tree_health_simplified == "ozone" & reforested == "no")),
                sum(with(cont_tab, tree_health_simplified == "ozone_and_other" & reforested == "no"))),
                dim=c(3,2), dimnames=list( c("healthy","ozone","ozone and other"), c("yes","no"))))

colnames(tab_reforested)<- c("reforested","naturally regeneration")


names(attributes(tab_reforested)$dimnames) <- c("Health status", "Origin")

# Barplot
p_3a<-ggplot(cont_tab, aes(reforested, ..count..) ) +
  geom_bar(aes(fill = tree_health_simplified), position = "dodge")+
  scale_x_discrete(breaks = c("no", "yes"),labels = c("Natural \n regeneration", "Reforested"))+
  scale_fill_manual(name ="Health status", values = c("healthy" = "darkgreen", "ozone" = "darkred", "ozone_and_other" = "orangered1"),labels= c("healthy", "ozone","ozone and other"))+
  theme_bw()+ ggtitle("a)")+ theme(legend.title.align = 0.5)+ 
  theme(text = element_text(size = 20))+ 
  theme(plot.title = element_text(lineheight=1.1))+
  labs(y="Number of trees", x= "Origin")
p_3a
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.

# Mosaic Plot with vcd library
p_3b<-mosaic(tab_reforested, shade=TRUE, legend=TRUE,
            labeling_args=list(rot_labels=c(bottom=90,top=0),gp_labels=(gpar(fontsize=12))))

# Chi2
chisq <- chisq.test(tab_reforested)
chisq
## 
##  Pearson's Chi-squared test
## 
## data:  tab_reforested
## X-squared = 17.399, df = 2, p-value = 0.0001666
Contingency <- function(x) {
    chi <- chisq.test(x)
    unname(sqrt(chi$statistic / (chi$statistic + sum(x))))
}

Contingency(tab_reforested)
## [1] 0.1240651
# Observed counts
chisq$observed
##                  Origin
## Health status     reforested naturally regeneration
##   healthy                106                    383
##   ozone                   31                     94
##   ozone and other         64                    435
# Expected counts
round(chisq$expected,2)
##                  Origin
## Health status     reforested naturally regeneration
##   healthy              88.31                 400.69
##   ozone                22.57                 102.43
##   ozone and other      90.12                 408.88
# Pearson residuals (residuos estandarizados)
round(chisq$residuals, 3)
##                  Origin
## Health status     reforested naturally regeneration
##   healthy              1.882                 -0.884
##   ozone                1.773                 -0.833
##   ozone and other     -2.751                  1.292
# Visuaalize Pearson residuals
library(corrplot)
corrplot(chisq$residuals, is.cor = FALSE)

# Contibution in percentage (%)
contrib <- 100*chisq$residuals^2/chisq$statistic
round(contrib, 3)
##                  Origin
## Health status     reforested naturally regeneration
##   healthy             20.366                  4.489
##   ozone               18.075                  3.984
##   ozone and other     43.499                  9.587
# Visualize the contribution
corrplot(contrib, is.cor = FALSE)

Figure 3c y d Covered with reforested and natural regeneration

tab_covered<- as.table(array(c(sum(with(cont_tab, tree_health_simplified == "healthy" & tree_exposition == "cover")),
                sum(with(cont_tab, tree_health_simplified == "ozone" & tree_exposition == "cover")),
                sum(with(cont_tab, tree_health_simplified == "ozone_and_other" & tree_exposition == "cover")),
                sum(with(cont_tab, tree_health_simplified == "healthy" & tree_exposition == "exposed")),
                sum(with(cont_tab, tree_health_simplified == "ozone" & tree_exposition == "exposed")),
                sum(with(cont_tab, tree_health_simplified == "ozone_and_other" & tree_exposition == "exposed"))),
             dim=c(3,2), dimnames=list( c("healthy","ozone","ozone and other"), c("cover", "exposed"))))

colnames(tab_covered)<- c("nursed","exposed")
names(attributes(tab_covered)$dimnames) <- c("Health status", "Tree exposition")

#Barplot

p_3c<-ggplot(cont_tab, aes(tree_exposition, ..count..)) +
  geom_bar(aes(fill = tree_health_simplified), position = "dodge")+
  scale_x_discrete(breaks = c("cover", "exposed"),labels = c("Nursed", "Exposed"))+
  scale_fill_manual(name ="Health status",values = c("healthy" = "darkgreen", "ozone" = "darkred", "ozone_and_other" = "orangered1"), labels= c("healthy", "ozone","ozone and other"))+
  theme_bw()+ 
  ggtitle("c)")+theme(legend.title.align = 0.5)+
  theme(text = element_text(size = 20))+ 
  theme(plot.title = element_text(lineheight=1.1, face="plain"))+
  labs(y="Number of trees", x= "Tree exposition")
p_3c

# Mosaic Plot with vcd library
p_3d <- mosaic(tab_covered, shade=TRUE, legend=TRUE, labeling_args=list(rot_labels=c(bottom=90,top=0),gp_labels=(gpar(fontsize=12))))

# Chi2
chisq <- chisq.test(tab_covered)
chisq
## 
##  Pearson's Chi-squared test
## 
## data:  tab_covered
## X-squared = 11.524, df = 2, p-value = 0.003145
# Observed counts
chisq$observed
##                  Tree exposition
## Health status     nursed exposed
##   healthy            322     167
##   ozone               62      63
##   ozone and other    319     180
# Expected counts
round(chisq$expected,2)
##                  Tree exposition
## Health status     nursed exposed
##   healthy         308.87  180.13
##   ozone            78.95   46.05
##   ozone and other 315.18  183.82
# Pearson residuals (residuos estandarizados)
round(chisq$residuals, 3)
##                  Tree exposition
## Health status     nursed exposed
##   healthy          0.747  -0.979
##   ozone           -1.908   2.498
##   ozone and other  0.215  -0.282
# Visuaalize Pearson residuals
corrplot(chisq$residuals, is.cor = FALSE)

# Contibution in percentage (%)
contrib <- 100*chisq$residuals^2/chisq$statistic
round(contrib, 3)
##                  Tree exposition
## Health status     nursed exposed
##   healthy          4.847   8.311
##   ozone           31.589  54.163
##   ozone and other  0.401   0.688
# Visualize the contribution
library(corrplot)
corrplot(contrib, is.cor = FALSE)

Multiplot

multiplot(p_3a, p_3b, p_3c, p_3d, cols=2)

##                 Origin reforested naturally regeneration
## Health status                                           
## healthy                       106                    383
## ozone                          31                     94
## ozone and other                64                    435
##                 Tree exposition nursed exposed
## Health status                                 
## healthy                            322     167
## ozone                               62      63
## ozone and other                    319     180

Figure 3c y d only reforested trees

# Select tree reforested and covered data 
cont_tab_2<- select(muestreo_tidy, contains(c("tree_health_simplified", "reforested", "tree_exposition"))) %>%
  filter(tree_health_simplified == "healthy"| tree_health_simplified == "ozone" | tree_health_simplified == "ozone_and_other")%>%
  filter(reforested== "yes")
count(cont_tab_2)
count(cont_tab)
table(cont_tab_2)
## , , tree_exposition = cover
## 
##                       reforested
## tree_health_simplified yes
##        healthy          75
##        ozone            20
##        ozone_and_other  41
## 
## , , tree_exposition = exposed
## 
##                       reforested
## tree_health_simplified yes
##        healthy          31
##        ozone            11
##        ozone_and_other  23
tab_covered_2<- as.table(array(c(sum(with(cont_tab_2, tree_health_simplified == "healthy" & tree_exposition == "cover")),
                sum(with(cont_tab_2, tree_health_simplified == "ozone" & tree_exposition == "cover")),
                sum(with(cont_tab_2, tree_health_simplified == "ozone_and_other" & tree_exposition == "cover")),
                sum(with(cont_tab_2, tree_health_simplified == "healthy" & tree_exposition == "exposed")),
                sum(with(cont_tab_2, tree_health_simplified == "ozone" & tree_exposition == "exposed")),
                sum(with(cont_tab_2, tree_health_simplified == "ozone_and_other" & tree_exposition == "exposed"))),
             dim=c(3,2), dimnames=list( c("healthy","ozone","ozone and other"), c("cover", "exposed"))))

colnames(tab_covered_2)<- c("nursed","exposed")
names(attributes(tab_covered_2)$dimnames) <- c("Health status", "Tree exposition")

#Barplot
Rp_3a<-ggplot(cont_tab_2, aes(tree_exposition, ..count..)) +
  geom_bar(aes(fill = tree_health_simplified), position = "dodge")+
  scale_x_discrete(breaks = c("cover", "exposed"),
                   labels = c("Nursed", "Exposed"))+
  scale_fill_manual(name ="Health status", 
                    values = c("healthy" = "darkgreen", "ozone" = "darkred", "ozone_and_other" = "orangered1"), 
                    labels= c("healthy", "ozone","ozone and other"))+
  theme_bw()+ 
  ggtitle("a)")+theme(legend.title.align = 0.5)+theme(text = element_text(size = 20))+ 
  theme(plot.title = element_text(lineheight=1.1, face="plain"))+
  labs(y="Number of trees", x= "Tree exposition")
  
Rp_3a

# Mosaic Plot with vcd library
Rp_3b <- mosaic(tab_covered_2, shade=TRUE, legend=TRUE, labeling_args=list(rot_labels=c(bottom=90,top=0),gp_labels=(gpar(fontsize=12))))

# Chi2
chisq <- chisq.test(tab_covered_2)
chisq
## 
##  Pearson's Chi-squared test
## 
## data:  tab_covered_2
## X-squared = 0.98255, df = 2, p-value = 0.6118
# Observed counts
chisq$observed
##                  Tree exposition
## Health status     nursed exposed
##   healthy             75      31
##   ozone               20      11
##   ozone and other     41      23
# Expected counts
round(chisq$expected,2)
##                  Tree exposition
## Health status     nursed exposed
##   healthy          71.72   34.28
##   ozone            20.98   10.02
##   ozone and other  43.30   20.70
# Pearson residuals (residuos estandarizados)
round(chisq$residuals, 3)
##                  Tree exposition
## Health status     nursed exposed
##   healthy          0.387  -0.560
##   ozone           -0.213   0.308
##   ozone and other -0.350   0.506
# Visuaalize Pearson residuals
corrplot(chisq$residuals, is.cor = FALSE)

# Contibution in percentage (%)
contrib <- 100*chisq$residuals^2/chisq$statistic
round(contrib, 3)
##                  Tree exposition
## Health status     nursed exposed
##   healthy         15.254  31.915
##   ozone            4.614   9.654
##   ozone and other 12.471  26.093
# Visualize the contribution
library(corrplot)
corrplot(contrib, is.cor = FALSE)

Figure 3c y d NO reforested trees

# Select tree reforested and covered data 
cont_tab_3<- select(muestreo_tidy, contains(c("tree_health_simplified", "reforested", "tree_exposition"))) %>%
  filter(tree_health_simplified == "healthy"| tree_health_simplified == "ozone" | tree_health_simplified == "ozone_and_other")%>%
  filter(reforested== "no")
count(cont_tab_3)
count(cont_tab)
table(cont_tab_3)
## , , tree_exposition = cover
## 
##                       reforested
## tree_health_simplified  no
##        healthy         247
##        ozone            42
##        ozone_and_other 278
## 
## , , tree_exposition = exposed
## 
##                       reforested
## tree_health_simplified  no
##        healthy         136
##        ozone            52
##        ozone_and_other 157
tab_covered_3<- as.table(array(c(sum(with(cont_tab_3, tree_health_simplified == "healthy" & tree_exposition == "cover")),
                sum(with(cont_tab_3, tree_health_simplified == "ozone" & tree_exposition == "cover")),
                sum(with(cont_tab_3, tree_health_simplified == "ozone_and_other" & tree_exposition == "cover")),
                sum(with(cont_tab_3, tree_health_simplified == "healthy" & tree_exposition == "exposed")),
                sum(with(cont_tab_3, tree_health_simplified == "ozone" & tree_exposition == "exposed")),
                sum(with(cont_tab_3, tree_health_simplified == "ozone_and_other" & tree_exposition == "exposed"))),
             dim=c(3,2), dimnames=list( c("healthy","ozone","ozone and other"), c("cover", "exposed"))))

colnames(tab_covered_3)<- c("nursed","exposed")
names(attributes(tab_covered_3)$dimnames) <- c("Health status", "Tree exposition")

#Barplot
NRp_3c<-ggplot(cont_tab_3, aes(tree_exposition, ..count..)) +
  geom_bar(aes(fill = tree_health_simplified), position = "dodge")+
  scale_x_discrete(breaks = c("cover", "exposed"),
                   labels = c("Nursed", "Exposed"))+
  scale_fill_manual(name ="Health status", 
                    values = c("healthy" = "darkgreen", "ozone" = "darkred", "ozone_and_other" = "orangered1"), 
                    labels= c("healthy", "ozone","ozone and other"))+
  theme_bw()+ 
  ggtitle("c)")+theme(legend.title.align = 0.5)+theme(text = element_text(size = 20))+ 
  theme(plot.title = element_text(lineheight=1.1, face="plain"))+
  labs(y="Number of trees", x= "Tree exposition")
  
NRp_3c

# Mosaic Plot with vcd library
NRp_3d <- mosaic(tab_covered_3, shade=TRUE, legend=TRUE, labeling_args=list(rot_labels=c(bottom=90,top=0),gp_labels=(gpar(fontsize=12))))

# Chi2
chisq <- chisq.test(tab_covered_3)
chisq
## 
##  Pearson's Chi-squared test
## 
## data:  tab_covered_3
## X-squared = 13.661, df = 2, p-value = 0.00108
# Observed counts
chisq$observed
##                  Tree exposition
## Health status     nursed exposed
##   healthy            247     136
##   ozone               42      52
##   ozone and other    278     157
# Expected counts
round(chisq$expected,2)
##                  Tree exposition
## Health status     nursed exposed
##   healthy         238.12  144.88
##   ozone            58.44   35.56
##   ozone and other 270.44  164.56
# Pearson residuals (residuos estandarizados)
round(chisq$residuals, 3)
##                  Tree exposition
## Health status     nursed exposed
##   healthy          0.576  -0.738
##   ozone           -2.151   2.757
##   ozone and other  0.459  -0.589
# Visuaalize Pearson residuals
corrplot(chisq$residuals, is.cor = FALSE)

# Contibution in percentage (%)
contrib <- 100*chisq$residuals^2/chisq$statistic
round(contrib, 3)
##                  Tree exposition
## Health status     nursed exposed
##   healthy          2.427   3.988
##   ozone           33.857  55.643
##   ozone and other  1.545   2.540
# Visualize the contribution
library(corrplot)
corrplot(contrib, is.cor = FALSE)

multiplot(p_3a, NRp_3c, p_3b, NRp_3d, cols=2)

##                 Origin reforested naturally regeneration
## Health status                                           
## healthy                       106                    383
## ozone                          31                     94
## ozone and other                64                    435
##                 Tree exposition nursed exposed
## Health status                                 
## healthy                            247     136
## ozone                               42      52
## ozone and other                    278     157

Tabla contingencia ozone + other y combinados no ozono

# Select tree reforested and covered data 
cont_tab_4<- select(muestreo_tidy, contains(c("tree_health_simplified", "reforested", "tree_exposition"))) %>% filter(tree_health_simplified == "ozone_and_other"| tree_health_simplified == "others_combined" )%>%
  filter(reforested== "yes")                                            

count(cont_tab_4)
table(cont_tab_4)
## , , tree_exposition = cover
## 
##                       reforested
## tree_health_simplified yes
##        others_combined  37
##        ozone_and_other  41
## 
## , , tree_exposition = exposed
## 
##                       reforested
## tree_health_simplified yes
##        others_combined  15
##        ozone_and_other  23
tab_covered_4<- as.table(array(c(sum(with(cont_tab_4, tree_health_simplified == "ozone_and_other" & tree_exposition == "cover")),
                sum(with(cont_tab_4, tree_health_simplified == "others_combined" & tree_exposition == "cover")),
                sum(with(cont_tab_4, tree_health_simplified == "ozone_and_other" & tree_exposition == "exposed")),
                sum(with(cont_tab_4, tree_health_simplified == "others_combined" & tree_exposition == "exposed"))),
             dim=c(2,2), dimnames=list( c("ozone_and_other", "others_combined"), c("cover", "exposed"))))

colnames(tab_covered_4)<- c("nursed","exposed")
names(attributes(tab_covered_3)$dimnames) <- c("Health status", "Tree exposition")

#Barplot
p_3c<-ggplot(cont_tab_4, aes(tree_exposition, ..count..)) +
  geom_bar(aes(fill = tree_health_simplified), position = "dodge")+
  scale_x_discrete(breaks = c("cover", "exposed"),
                   labels = c("Nursed", "Exposed"))+
  scale_fill_manual(name ="Health status", 
                    values = c("ozone_and_other" = "orangered1", "others_combined" = "cadetblue"), 
                    labels= c("ozone and other", "others combined"))+
  theme_bw()+ 
  ggtitle(" ")+theme(legend.title.align = 0.5)+theme(text = element_text(size = 20))+ 
  theme(plot.title = element_text(lineheight=1.1, face="plain"))+
  labs(y="Number of trees", x= "Tree exposition")
  
p_3c

# Mosaic Plot with vcd library
p_3d <- mosaic(tab_covered_4, shade=TRUE, legend=TRUE, labeling_args=list(rot_labels=c(bottom=90,top=0),gp_labels=(gpar(fontsize=12))))

# Chi2
chisq <- chisq.test(tab_covered_4)
chisq
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  tab_covered_4
## X-squared = 0.37259, df = 1, p-value = 0.5416
# Observed counts
chisq$observed
##                 nursed exposed
## ozone_and_other     41      23
## others_combined     37      15
# Expected counts
round(chisq$expected,2)
##                 nursed exposed
## ozone_and_other  43.03   20.97
## others_combined  34.97   17.03
# Pearson residuals (residuos estandarizados)
round(chisq$residuals, 3)
##                 nursed exposed
## ozone_and_other -0.310   0.444
## others_combined  0.344  -0.493
# Visuaalize Pearson residuals
corrplot(chisq$residuals, is.cor = FALSE)

# Contibution in percentage (%)
contrib <- 100*chisq$residuals^2/chisq$statistic
round(contrib, 3)
##                 nursed exposed
## ozone_and_other 25.814  52.987
## others_combined 31.771  65.214
# Visualize the contribution
library(corrplot)
corrplot(contrib, is.cor = FALSE)

Figure 4

Plot 4a

condition_HOO<-muestreo_tidy%>%
  filter(tree_health_simplified == "healthy" | tree_health_simplified == "ozone" |
           tree_health_simplified == "ozone_and_other" )


p <- filter(condition_HOO, tree_heigth<15, tree_nodes>0) %>% 
     ggplot(.) +
     scale_fill_manual(values= my_cols, breaks = desired_order,
                    labels= desired_names,
                    name= "Health status") +
theme_bw()

p4_a <- p + geom_histogram(aes(x=tree_nodes, fill=tree_health_simplified))  +
    labs(x="Tree age (years)", y= "Number of trees") +
    theme(text = element_text(size = 20)) +
    theme(plot.title = element_text(lineheight=1.1))+
  ggtitle("a)")
p4_a

Plot 4c

## base data
# Definir plantas sanas y dañadas por otra cosa que no fuera ozono
# cond_PO<- se  refiere a condition Percentage damage by Ozone 
cond_PO<-as_data_frame(muestreo_tidy)
## Warning: `as_data_frame()` was deprecated in tibble 2.0.0.
## ℹ Please use `as_tibble()` instead.
## ℹ The signature and semantics have changed, see `?as_tibble`.
# Asignar 0% de daño por ozono a los árboles healthy
cond_PO$ozone_damage_percentage = ifelse(cond_PO$tree_health == "healthy", "0%", cond_PO$ozone_damage_percentage)

# Filtrar por porcentaje de daño
condition_PO<-cond_PO%>%
  filter(ozone_damage_percentage == "0%" | ozone_damage_percentage == "less than 10%" | ozone_damage_percentage == "10 to 40%" | ozone_damage_percentage == "40 to 50%"| ozone_damage_percentage == "50 to 70%" | ozone_damage_percentage == "more than 70%")

condition_PO$ozone_damage_percentage <- as.factor(condition_PO$ozone_damage_percentage)

# Plot
p_od<- condition_PO %>% filter(!is.na(ozone_damage_percentage)) %>%
  ggplot(., aes(x = tree_nodes, fill = ozone_damage_percentage)) +
  geom_bar_pattern(aes(pattern = ozone_damage_percentage),
                   color = "black",
                   pattern_fill = "black",
                   pattern_density = 0.1,
                   pattern_spacing = 0.025,
                   pattern_key_scale_factor = 0.6) +
  scale_fill_manual(values= my_cols2, 
                    breaks = desired_order_percentage,
                    labels = c("0%","less 10%", "10 to 40%", "40 to 50%",
                               "50 to 70%", "more 70%"),
                    name= "Ozone damage\n per tree") +
  scale_pattern_manual(name= "Ozone damage\n per tree",
                       values =c("none","none", "stripe", "none",
                                 "none", "none"),
                       labels = c("0%","less 10%", "10 to 40%", "40 to 50%",
                                  "50 to 70%", "more 70%"))+
  theme_bw()+ theme(text = element_text(size = 20)) 

p_od
## Warning: Removed 147 rows containing non-finite values (`stat_count()`).

p4_c <- p_od +
  labs(x="Tree age (years)", y= "Number of trees") +
  theme(legend.title.align = 0.5)+
  theme(text = element_text(size = 20)) +
  theme(plot.title = element_text(lineheight=1.1))+
  ggtitle("c)")

p4_c
## Warning: Removed 147 rows containing non-finite values (`stat_count()`).

Plot 4b

# Filtrar por categoría de daño
condition_HOO<-muestreo_tidy%>%
  filter(tree_health_simplified == "healthy" | tree_health_simplified == "ozone" | tree_health_simplified == "ozone_and_other" )

condition_HOO$tree_health_simplified <- as.factor(condition_HOO$tree_health_simplified)



# Data distribution

# Los datos tienen a graficar es el número de nodos para cada categoria de salud.
# Los datos son continuos discretos, por lo tanto el analisis a seguir para buscar diferencias entre los grupos son:

shapiro.test(condition_HOO$tree_nodes) #NO HAY NORMALIDAD 
## 
##  Shapiro-Wilk normality test
## 
## data:  condition_HOO$tree_nodes
## W = 0.94294, p-value < 2.2e-16
# Procedo a hacer una prueba no paramétrica (kruskal)

# Debe tener Homocedasticidad: la varianza debe de ser constante entre todos los grupos.
# If the p-value is less than our chosen significance level, we can reject the null hypothesis and conclude that we have enough evidence to state that the variance among the groups is not equal.
leveneTest(sqrt(tree_nodes) ~ tree_health_simplified, data = condition_HOO) #Opera con medias
# As the p value obtained from the Shapiro-Wilk test and Levene’s test is significant (p < 0.05), we conclude that the data is not normally distributed and does not have equal variance. Kruskal-Wallis test is more appropriate for analyzing differences.

kruskal.test(sqrt(tree_nodes) ~ tree_health_simplified, data = condition_HOO) #opera con mediana
## 
##  Kruskal-Wallis rank sum test
## 
## data:  sqrt(tree_nodes) by tree_health_simplified
## Kruskal-Wallis chi-squared = 265.57, df = 2, p-value < 2.2e-16
#As the p value obtained from the Kruskal-Wallis test test is significant , we conclude that there are significant differences.

# For the Kruskal-Wallis test, epsilon-squared is a method of choice for effect size measurement. The epsilon-squared is 0.74 and suggests a very strong effect of plant varieties on yield

# calculate effect size. Hay efecto relativamente fuerte
library(rcompanion)
epsilonSquared(x = sqrt(condition_HOO$tree_nodes), g = condition_HOO$tree_health_simplified)
## epsilon.squared 
##           0.239
# https://peterstatistics.com/CrashCourse/3-TwoVarUnpair/NomOrd/NomOrd3c.html
# To know which plant varieties are significantly different from each other, we will perform the Dunn’s test as post-hoc test for significant Kruskal-Wallis test. As there are multiple comparisons, we will correct the p values using Benjamini-Hochberg FDR method for multiple hypothesis testing at a 5% cut-off. The other tests that can be used for post-hoc test includes Conover and Nemenyi tests. Dunn’s test is more appropriate post-hoc than the Mann-Whitney U test for significant Kruskal-Wallis test as it retains the rank sums of the Kruskal-Wallis.

#poshoc para saber que grupos difieren
pairwise.wilcox.test(x = sqrt(condition_HOO$tree_nodes), g = condition_HOO$tree_health_simplified, p.adjust.method = "holm" )
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  sqrt(condition_HOO$tree_nodes) and condition_HOO$tree_health_simplified 
## 
##                 healthy ozone
## ozone           <2e-16  -    
## ozone_and_other <2e-16  0.012
## 
## P value adjustment method: holm
# Perorm pairwise comparisons
library(ggpubr)
my_comparisons <- list( c("healthy", "ozone"), c("healthy", "ozone_and_other"), c("ozone", "ozone_and_other") )


p4_b<-condition_HOO%>%
   ggplot(aes(y= tree_health_simplified, x= tree_nodes))+
        scale_color_manual(values=  my_cols, labels= desired_names,
                    name= "")+
        geom_point(position="jitter",aes(color = tree_health_simplified), alpha=0.5, size= 2.5)+
  geom_boxplot(color="black", notch = F, alpha = 0.1)+
        xlab("Tree age (years)")+ ylab("Health status")+
  theme_bw()+
  ggtitle("b)")+
  theme(text = element_text(size = 20), axis.text.y=element_blank())+
  theme(plot.title = element_text(lineheight=1.1))+
  scale_x_continuous(breaks=c(5,10,15, 20, 25))+
  stat_compare_means(comparisons = my_comparisons, label = "p.signif")
p4_b
## Warning: Removed 147 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 147 rows containing non-finite values (`stat_signif()`).
## Warning: Removed 147 rows containing missing values (`geom_point()`).

Plot 4d

# Perorm pairwise comparisons
# Run Shapiro-Wilk test
shapiro.test(condition_PO$tree_nodes) #NO HAY NORMALIDAD
## 
##  Shapiro-Wilk normality test
## 
## data:  condition_PO$tree_nodes
## W = 0.94294, p-value < 2.2e-16
# Procedo a hacer un kruskal
# Debe tener homogeneidad. Si es mayor a 0.05 No hay evidencias en contra de la homogeneidad de varianzas. 
leveneTest(sqrt(tree_nodes) ~ ozone_damage_percentage, data = condition_PO, center = "median")
kruskal.test(sqrt(tree_nodes) ~ ozone_damage_percentage, data = condition_PO)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  sqrt(tree_nodes) by ozone_damage_percentage
## Kruskal-Wallis chi-squared = 283.95, df = 5, p-value < 2.2e-16
epsilonSquared(x = sqrt(condition_PO$tree_nodes), g = condition_PO$ozone_damage_percentage)
## epsilon.squared 
##           0.255
#poshoc que grupos difieren

#order levels 
condition_PO$ozone_damage_percentage<- ordered(condition_PO$ozone_damage_percentage, levels=c("0%","less than 10%", "10 to 40%", "40 to 50%", "50 to 70%","more than 70%"))

#Prueba
pairwise.wilcox.test(x = sqrt(condition_PO$tree_nodes), g = condition_PO$ozone_damage_percentage, p.adjust.method = "bonferroni" )
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  sqrt(condition_PO$tree_nodes) and condition_PO$ozone_damage_percentage 
## 
##               0%      less than 10% 10 to 40% 40 to 50% 50 to 70%
## less than 10% < 2e-16 -             -         -         -        
## 10 to 40%     < 2e-16 0.6498        -         -         -        
## 40 to 50%     < 2e-16 0.2086        1.0000    -         -        
## 50 to 70%     < 2e-16 2.9e-06       0.0026    0.5273    -        
## more than 70% 1.7e-05 1.0000        1.0000    1.0000    0.2060   
## 
## P value adjustment method: bonferroni
my_comparisons <- list(c("0%", "less than 10%"), c("0%", "10 to 40%"), c("0%", "40 to 50%"),
                        c("0%", "50 to 70%"), c("0%", "more than 70%"),
                        c("less than 10%", "50 to 70%"), 
                       c("10 to 40%", "50 to 70%"))

# Plot
p4_d<-condition_PO%>% filter(!is.na(ozone_damage_percentage)) %>% 
   ggplot(aes(y=ozone_damage_percentage , x= tree_nodes))+
        scale_color_manual(values=  my_cols2,labels = c("0%","less 10%", "10 to 40%", "40 to 50%",
                                         "50 to 70%", "more 70%"))+
        geom_point(position="jitter",aes(color = ozone_damage_percentage), alpha=0.5, size= 2.5)+
        xlab("Tree age (years)")+ ylab("Ozone damage per tree")+
  labs(color = "")+
          geom_boxplot(color="black", notch = F, alpha = 0.1)+
  theme_bw()+
  ggtitle("d)")+
  theme(legend.title.align = 0.5)+
  theme(text = element_text(size = 20),axis.text.y=element_blank())+
  theme(plot.title = element_text(lineheight=1.1))+
  scale_x_continuous(breaks=c(5,10,15, 20, 25))+
  ggpubr::stat_compare_means(comparisons = my_comparisons, label="p.signif")


p4_d
## Warning: Removed 147 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 147 rows containing non-finite values (`stat_signif()`).
## Warning: Removed 147 rows containing missing values (`geom_point()`).

Multiplot Fig 4

multiplot(p4_a, p4_c, p4_b, p4_d, cols=2)
## Warning: Removed 147 rows containing non-finite values (`stat_count()`).
## Warning: Removed 147 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 147 rows containing non-finite values (`stat_signif()`).
## Warning: Removed 147 rows containing missing values (`geom_point()`).
## Warning: Removed 147 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 147 rows containing non-finite values (`stat_signif()`).
## Warning: Removed 147 rows containing missing values (`geom_point()`).

Figure 5

Plot model all data WITHOUT cohort

condition_model_all<-dplyr::select(muestreo_tidy, contains(c("tree_heigth","tree_nodes","tree_health_simplified", "reforested", "tree_exposition", "plot")))%>%
  filter(tree_health_simplified == "healthy" | tree_health_simplified == "ozone" |tree_health_simplified == "ozone_and_other" )%>%
  filter(tree_nodes < 15)

# https://rpubs.com/dgolicher/ggplots2
# https://stackoverflow.com/questions/46302410/how-to-specify-link-function-in-ggplot-glm-graph

g0<-ggplot(condition_model_all,aes(x=tree_nodes,y=tree_heigth))

# method = "glm", method.args = list(family = Gamma(link = "log"))
fig5all<-g0+geom_point(aes(colour=tree_health_simplified, 
                           shape=tree_health_simplified), alpha=0.5, size= 2.5)+
  ylim(0,15)+
  geom_smooth(method = "glm", method.args = list(family = Gamma(link = "log")), aes(color= tree_health_simplified), fullrange =T)+
  scale_color_manual(values=  my_cols, labels= desired_names,
                     name= "Health status")+ 
  scale_shape_manual(name= "Health status", labels=desired_names, values = c(15,16,17)) + 
  labs(x="Tree age (years)", y= "Tree height")+
  theme_bw()+ 
  ggtitle("a)")
fig5all

Select cohort data

cohort_data<- dplyr::select(condition_HOO, contains(c("tree_health_simplified", "tree_nodes")))%>% filter(tree_health_simplified == "ozone")
summary(cohort_data)
##      tree_health_simplified   tree_nodes    
##  healthy        :  0        Min.   : 3.000  
##  ozone          :125        1st Qu.: 7.000  
##  ozone_and_other:  0        Median : 8.000  
##                             Mean   : 9.038  
##                             3rd Qu.:11.000  
##                             Max.   :19.000  
##                             NA's   :20
# We choose 7 & 11

Model all data with cohort 7-11

condition_model_all_C<-dplyr::select(muestreo_tidy, contains(c("tree_heigth","tree_nodes","tree_health_simplified", "reforested", "tree_exposition", "plot")))%>%
  filter(tree_health_simplified == "healthy" | tree_health_simplified == "ozone" |tree_health_simplified == "ozone_and_other" )%>%
  filter(tree_nodes %in% (7:11))%>%
  filter(tree_nodes < 15)


########################
library(lme4)

########################

# Model all independent variables WITH health status interaction 
glm_1_coh<-glm(tree_heigth ~ tree_nodes, data = condition_model_all_C, family = Gamma(link = "log"))
summary(glm_1_coh)
## 
## Call:
## glm(formula = tree_heigth ~ tree_nodes, family = Gamma(link = "log"), 
##     data = condition_model_all_C)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6684  -0.6477  -0.1315   0.2175   1.9563  
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.82092    0.24866  -3.301  0.00105 ** 
## tree_nodes   0.22820    0.02815   8.108 6.36e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Gamma family taken to be 0.5628858)
## 
##     Null deviance: 235.07  on 401  degrees of freedom
## Residual deviance: 200.90  on 400  degrees of freedom
## AIC: 1642.7
## 
## Number of Fisher Scoring iterations: 5
#### PLOT MODEL
par(mfrow =c(2,2))
plot(glm_1_coh)

########################
# Model all independent variables WITH health status interaction WITHOUT reforested data
glm_2_coh<-glmer(tree_heigth ~ tree_nodes + tree_health_simplified + tree_exposition + reforested + (1|plot), data = condition_model_all_C, family = Gamma(link = "log"))
summary(glm_2_coh)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: Gamma  ( log )
## Formula: tree_heigth ~ tree_nodes + tree_health_simplified + tree_exposition +  
##     reforested + (1 | plot)
##    Data: condition_model_all_C
## 
##      AIC      BIC   logLik deviance df.resid 
##   1525.5   1557.5   -754.7   1509.5      394 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.5686 -0.7133 -0.0860  0.4988  4.9872 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  plot     (Intercept) 0.09391  0.3064  
##  Residual             0.33133  0.5756  
## Number of obs: 402, groups:  plot, 45
## 
## Fixed effects:
##                                       Estimate Std. Error t value Pr(>|z|)    
## (Intercept)                           -1.02639    0.24521  -4.186 2.84e-05 ***
## tree_nodes                             0.22537    0.02568   8.776  < 2e-16 ***
## tree_health_simplifiedozone            0.22387    0.09537   2.348   0.0189 *  
## tree_health_simplifiedozone_and_other  0.21017    0.07834   2.683   0.0073 ** 
## tree_expositionexposed                 0.11226    0.06787   1.654   0.0981 .  
## reforestedyes                         -0.32335    0.10358  -3.122   0.0018 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) tr_nds tr_hl_ tr____ tr_xps
## tree_nodes  -0.913                            
## tr_hlth_smp -0.054 -0.071                     
## tr_hlth_s__ -0.195 -0.006  0.468              
## tr_xpstnxps -0.035 -0.063 -0.093  0.071       
## reforestdys -0.184  0.064 -0.073  0.021  0.015
#### PLOT MODEL
par(mfrow =c(2,2))
plot(glm_2_coh)

########################
# Model all independent variables WITH health status interaction WITHOUT tree exposition data
glm_3_coh<-glmer(tree_heigth ~ tree_nodes + tree_health_simplified + reforested + (1|plot), data = condition_model_all_C, family = Gamma(link = "log"))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.0024414 (tol = 0.002, component 1)
summary(glm_3_coh)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: Gamma  ( log )
## Formula: tree_heigth ~ tree_nodes + tree_health_simplified + reforested +  
##     (1 | plot)
##    Data: condition_model_all_C
## 
##      AIC      BIC   logLik deviance df.resid 
##   1526.3   1554.2   -756.1   1512.3      395 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.5830 -0.6956 -0.0782  0.5100  4.8350 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  plot     (Intercept) 0.09337  0.3056  
##  Residual             0.32927  0.5738  
## Number of obs: 402, groups:  plot, 45
## 
## Fixed effects:
##                                       Estimate Std. Error t value Pr(>|z|)    
## (Intercept)                           -1.01127    0.24581  -4.114 3.89e-05 ***
## tree_nodes                             0.22811    0.02572   8.867  < 2e-16 ***
## tree_health_simplifiedozone            0.23865    0.09525   2.505  0.01223 *  
## tree_health_simplifiedozone_and_other  0.20065    0.07833   2.562  0.01042 *  
## reforestedyes                         -0.32619    0.10383  -3.142  0.00168 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) tr_nds tr_hl_ tr____
## tree_nodes  -0.918                     
## tr_hlth_smp -0.055 -0.080              
## tr_hlth_s__ -0.191 -0.002  0.479       
## reforestdys -0.183  0.065 -0.072  0.020
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.0024414 (tol = 0.002, component 1)
#### PLOT MODEL
par(mfrow =c(2,2))
plot(glm_3_coh)

########################
# Model all independent variables WITH health status interaction WITHOUT reforested and tree exposition data
glm_4_coh<-glmer(tree_heigth ~ tree_nodes+tree_health_simplified + tree_exposition + (1|plot) , data = condition_model_all_C, family = Gamma(link = "log"))
summary(glm_4_coh)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: Gamma  ( log )
## Formula: tree_heigth ~ tree_nodes + tree_health_simplified + tree_exposition +  
##     (1 | plot)
##    Data: condition_model_all_C
## 
##      AIC      BIC   logLik deviance df.resid 
##   1532.8   1560.8   -759.4   1518.8      395 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.5286 -0.6923 -0.0918  0.5039  4.9279 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  plot     (Intercept) 0.1090   0.3301  
##  Residual             0.3472   0.5893  
## Number of obs: 402, groups:  plot, 45
## 
## Fixed effects:
##                                       Estimate Std. Error t value Pr(>|z|)    
## (Intercept)                           -1.16381    0.24347  -4.780 1.75e-06 ***
## tree_nodes                             0.23014    0.02589   8.889  < 2e-16 ***
## tree_health_simplifiedozone            0.20237    0.09565   2.116  0.03436 *  
## tree_health_simplifiedozone_and_other  0.21658    0.07918   2.735  0.00623 ** 
## tree_expositionexposed                 0.11549    0.06838   1.689  0.09123 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) tr_nds tr_hl_ tr____
## tree_nodes  -0.916                     
## tr_hlth_smp -0.076 -0.059              
## tr_hlth_s__ -0.189 -0.009  0.476       
## tr_xpstnxps -0.034 -0.063 -0.092  0.072
#### PLOT MODEL
par(mfrow =c(2,2))
plot(glm_4_coh)

########################
# Model all independent variables WITH health status interaction WITHOUT reforested and tree exposition data
glm_5_coh<-glmer(tree_heigth ~ tree_nodes + (1|plot), data = condition_model_all_C, family = Gamma(link = "log"))
summary(glm_5_coh)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: Gamma  ( log )
## Formula: tree_heigth ~ tree_nodes + (1 | plot)
##    Data: condition_model_all_C
## 
##      AIC      BIC   logLik deviance df.resid 
##   1537.7   1553.7   -764.8   1529.7      398 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.5213 -0.6705 -0.1426  0.4839  4.9989 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  plot     (Intercept) 0.1129   0.3360  
##  Residual             0.3535   0.5946  
## Number of obs: 402, groups:  plot, 45
## 
## Fixed effects:
##             Estimate Std. Error t value Pr(>|z|)    
## (Intercept) -1.03026    0.24273  -4.245 2.19e-05 ***
## tree_nodes   0.23528    0.02619   8.984  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##            (Intr)
## tree_nodes -0.939
#### PLOT MODEL
par(mfrow =c(2,2))
plot(glm_5_coh)

########################
glm_6_coh<-glmer(tree_heigth ~ tree_nodes*tree_health_simplified + tree_exposition + reforested + (1|plot), data = condition_model_all_C, family = Gamma(link = "log"))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.149233 (tol = 0.002, component 1)
summary(glm_6_coh)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: Gamma  ( log )
## Formula: tree_heigth ~ tree_nodes * tree_health_simplified + tree_exposition +  
##     reforested + (1 | plot)
##    Data: condition_model_all_C
## 
##      AIC      BIC   logLik deviance df.resid 
##   1529.3   1569.2   -754.6   1509.3      392 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.5718 -0.6976 -0.0957  0.5028  4.9427 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  plot     (Intercept) 0.09461  0.3076  
##  Residual             0.33090  0.5752  
## Number of obs: 402, groups:  plot, 45
## 
## Fixed effects:
##                                                   Estimate Std. Error t value
## (Intercept)                                      -1.134342   0.384094  -2.953
## tree_nodes                                        0.237738   0.042440   5.602
## tree_health_simplifiedozone                       0.272458   0.619129   0.440
## tree_health_simplifiedozone_and_other             0.399009   0.464307   0.859
## tree_expositionexposed                            0.113422   0.067966   1.669
## reforestedyes                                    -0.323423   0.104602  -3.092
## tree_nodes:tree_health_simplifiedozone           -0.005865   0.070827  -0.083
## tree_nodes:tree_health_simplifiedozone_and_other -0.021693   0.052638  -0.412
##                                                  Pr(>|z|)    
## (Intercept)                                       0.00314 ** 
## tree_nodes                                       2.12e-08 ***
## tree_health_simplifiedozone                       0.65989    
## tree_health_simplifiedozone_and_other             0.39014    
## tree_expositionexposed                            0.09515 .  
## reforestedyes                                     0.00199 ** 
## tree_nodes:tree_health_simplifiedozone            0.93400    
## tree_nodes:tree_health_simplifiedozone_and_other  0.68025    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) tr_nds tr_hl_ tr____ tr_xps rfrstd tr_:__
## tree_nodes  -0.965                                          
## tr_hlth_smp -0.514  0.516                                   
## tr_hlth_s__ -0.749  0.753  0.433                            
## tr_xpstnxps -0.070  0.011  0.032  0.069                     
## reforestdys -0.175  0.099  0.125  0.045  0.021              
## tr_nds:tr__  0.519 -0.532 -0.988 -0.432 -0.047 -0.138       
## tr_nds:____  0.741 -0.767 -0.432 -0.986 -0.058 -0.043  0.443
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.149233 (tol = 0.002, component 1)
#### PLOT MODEL
par(mfrow =c(2,2))
plot(glm_6_coh)

########################
glm_7_coh<-glmer(tree_heigth ~ tree_nodes*tree_health_simplified + tree_exposition + reforested + (1|plot), data = condition_model_all_C, family = Gamma(link = "log"))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.149233 (tol = 0.002, component 1)
summary(glm_7_coh)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: Gamma  ( log )
## Formula: tree_heigth ~ tree_nodes * tree_health_simplified + tree_exposition +  
##     reforested + (1 | plot)
##    Data: condition_model_all_C
## 
##      AIC      BIC   logLik deviance df.resid 
##   1529.3   1569.2   -754.6   1509.3      392 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.5718 -0.6976 -0.0957  0.5028  4.9427 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  plot     (Intercept) 0.09461  0.3076  
##  Residual             0.33090  0.5752  
## Number of obs: 402, groups:  plot, 45
## 
## Fixed effects:
##                                                   Estimate Std. Error t value
## (Intercept)                                      -1.134342   0.384094  -2.953
## tree_nodes                                        0.237738   0.042440   5.602
## tree_health_simplifiedozone                       0.272458   0.619129   0.440
## tree_health_simplifiedozone_and_other             0.399009   0.464307   0.859
## tree_expositionexposed                            0.113422   0.067966   1.669
## reforestedyes                                    -0.323423   0.104602  -3.092
## tree_nodes:tree_health_simplifiedozone           -0.005865   0.070827  -0.083
## tree_nodes:tree_health_simplifiedozone_and_other -0.021693   0.052638  -0.412
##                                                  Pr(>|z|)    
## (Intercept)                                       0.00314 ** 
## tree_nodes                                       2.12e-08 ***
## tree_health_simplifiedozone                       0.65989    
## tree_health_simplifiedozone_and_other             0.39014    
## tree_expositionexposed                            0.09515 .  
## reforestedyes                                     0.00199 ** 
## tree_nodes:tree_health_simplifiedozone            0.93400    
## tree_nodes:tree_health_simplifiedozone_and_other  0.68025    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) tr_nds tr_hl_ tr____ tr_xps rfrstd tr_:__
## tree_nodes  -0.965                                          
## tr_hlth_smp -0.514  0.516                                   
## tr_hlth_s__ -0.749  0.753  0.433                            
## tr_xpstnxps -0.070  0.011  0.032  0.069                     
## reforestdys -0.175  0.099  0.125  0.045  0.021              
## tr_nds:tr__  0.519 -0.532 -0.988 -0.432 -0.047 -0.138       
## tr_nds:____  0.741 -0.767 -0.432 -0.986 -0.058 -0.043  0.443
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.149233 (tol = 0.002, component 1)
#### PLOT MODEL
par(mfrow =c(2,2))
plot(glm_7_coh)

########################
# Model all independent variables WITH health status interaction WITHOUT reforested data
glm_8_coh<-glm(tree_heigth ~ tree_nodes + tree_health_simplified + tree_exposition + reforested, data = condition_model_all_C, family = Gamma(link = "log"))
summary(glm_8_coh)
## 
## Call:
## glm(formula = tree_heigth ~ tree_nodes + tree_health_simplified + 
##     tree_exposition + reforested, family = Gamma(link = "log"), 
##     data = condition_model_all_C)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7444  -0.6398  -0.1405   0.3019   1.7701  
## 
## Coefficients:
##                                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                           -0.63394    0.24337  -2.605  0.00954 ** 
## tree_nodes                             0.19493    0.02629   7.416 7.41e-13 ***
## tree_health_simplifiedozone            0.23272    0.10299   2.260  0.02438 *  
## tree_health_simplifiedozone_and_other  0.18895    0.07914   2.388  0.01742 *  
## tree_expositionexposed                 0.13374    0.07557   1.770  0.07751 .  
## reforestedyes                         -0.32034    0.07950  -4.029 6.71e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Gamma family taken to be 0.4626594)
## 
##     Null deviance: 235.07  on 401  degrees of freedom
## Residual deviance: 185.21  on 396  degrees of freedom
## AIC: 1615.5
## 
## Number of Fisher Scoring iterations: 6
#### PLOT MODEL
par(mfrow =c(2,2))
plot(glm_8_coh)

# Choose better model
AIC(glm_1_coh,glm_2_coh,glm_3_coh, glm_4_coh, glm_5_coh, glm_6_coh, glm_7_coh, glm_8_coh)
######
library(forestmodel)
coef(glm_2_coh)
## $plot
##    (Intercept) tree_nodes tree_health_simplifiedozone
## 1   -0.5708594  0.2253729                   0.2238723
## 2   -0.7768326  0.2253729                   0.2238723
## 3   -0.5453314  0.2253729                   0.2238723
## 4   -1.1051397  0.2253729                   0.2238723
## 5   -0.9373549  0.2253729                   0.2238723
## 6   -1.1196400  0.2253729                   0.2238723
## 7   -0.6304442  0.2253729                   0.2238723
## 8   -0.8355182  0.2253729                   0.2238723
## 9   -1.3027564  0.2253729                   0.2238723
## 10  -0.4089150  0.2253729                   0.2238723
## 11  -0.8816738  0.2253729                   0.2238723
## 12  -1.0791983  0.2253729                   0.2238723
## 13  -0.8250442  0.2253729                   0.2238723
## 14  -1.0973120  0.2253729                   0.2238723
## 15  -0.8591396  0.2253729                   0.2238723
## 16  -0.9613413  0.2253729                   0.2238723
## 17  -0.8356753  0.2253729                   0.2238723
## 18  -0.8813159  0.2253729                   0.2238723
## 19  -0.6835645  0.2253729                   0.2238723
## 20  -0.5519089  0.2253729                   0.2238723
## 21  -1.0614391  0.2253729                   0.2238723
## 25  -1.2624673  0.2253729                   0.2238723
## 26  -1.1261501  0.2253729                   0.2238723
## 27  -0.7821193  0.2253729                   0.2238723
## 28  -1.3819118  0.2253729                   0.2238723
## 29  -1.5268494  0.2253729                   0.2238723
## 30  -1.1148071  0.2253729                   0.2238723
## 31  -1.1068510  0.2253729                   0.2238723
## 32  -1.2188473  0.2253729                   0.2238723
## 33  -0.5588080  0.2253729                   0.2238723
## 34  -1.0656423  0.2253729                   0.2238723
## 35  -0.9269286  0.2253729                   0.2238723
## 36  -1.7117587  0.2253729                   0.2238723
## 37  -1.2030318  0.2253729                   0.2238723
## 38  -1.1473383  0.2253729                   0.2238723
## 39  -1.1787574  0.2253729                   0.2238723
## 40  -0.8556192  0.2253729                   0.2238723
## 41  -1.2688315  0.2253729                   0.2238723
## 42  -1.1939389  0.2253729                   0.2238723
## 43  -1.2576406  0.2253729                   0.2238723
## 44  -1.2127023  0.2253729                   0.2238723
## 45  -1.1785724  0.2253729                   0.2238723
## 46  -0.7532611  0.2253729                   0.2238723
## 47  -1.0873556  0.2253729                   0.2238723
## 48  -1.0236347  0.2253729                   0.2238723
##    tree_health_simplifiedozone_and_other tree_expositionexposed reforestedyes
## 1                              0.2101677              0.1122581     -0.323353
## 2                              0.2101677              0.1122581     -0.323353
## 3                              0.2101677              0.1122581     -0.323353
## 4                              0.2101677              0.1122581     -0.323353
## 5                              0.2101677              0.1122581     -0.323353
## 6                              0.2101677              0.1122581     -0.323353
## 7                              0.2101677              0.1122581     -0.323353
## 8                              0.2101677              0.1122581     -0.323353
## 9                              0.2101677              0.1122581     -0.323353
## 10                             0.2101677              0.1122581     -0.323353
## 11                             0.2101677              0.1122581     -0.323353
## 12                             0.2101677              0.1122581     -0.323353
## 13                             0.2101677              0.1122581     -0.323353
## 14                             0.2101677              0.1122581     -0.323353
## 15                             0.2101677              0.1122581     -0.323353
## 16                             0.2101677              0.1122581     -0.323353
## 17                             0.2101677              0.1122581     -0.323353
## 18                             0.2101677              0.1122581     -0.323353
## 19                             0.2101677              0.1122581     -0.323353
## 20                             0.2101677              0.1122581     -0.323353
## 21                             0.2101677              0.1122581     -0.323353
## 25                             0.2101677              0.1122581     -0.323353
## 26                             0.2101677              0.1122581     -0.323353
## 27                             0.2101677              0.1122581     -0.323353
## 28                             0.2101677              0.1122581     -0.323353
## 29                             0.2101677              0.1122581     -0.323353
## 30                             0.2101677              0.1122581     -0.323353
## 31                             0.2101677              0.1122581     -0.323353
## 32                             0.2101677              0.1122581     -0.323353
## 33                             0.2101677              0.1122581     -0.323353
## 34                             0.2101677              0.1122581     -0.323353
## 35                             0.2101677              0.1122581     -0.323353
## 36                             0.2101677              0.1122581     -0.323353
## 37                             0.2101677              0.1122581     -0.323353
## 38                             0.2101677              0.1122581     -0.323353
## 39                             0.2101677              0.1122581     -0.323353
## 40                             0.2101677              0.1122581     -0.323353
## 41                             0.2101677              0.1122581     -0.323353
## 42                             0.2101677              0.1122581     -0.323353
## 43                             0.2101677              0.1122581     -0.323353
## 44                             0.2101677              0.1122581     -0.323353
## 45                             0.2101677              0.1122581     -0.323353
## 46                             0.2101677              0.1122581     -0.323353
## 47                             0.2101677              0.1122581     -0.323353
## 48                             0.2101677              0.1122581     -0.323353
## 
## attr(,"class")
## [1] "coef.mer"
summary(glm_2_coh)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: Gamma  ( log )
## Formula: tree_heigth ~ tree_nodes + tree_health_simplified + tree_exposition +  
##     reforested + (1 | plot)
##    Data: condition_model_all_C
## 
##      AIC      BIC   logLik deviance df.resid 
##   1525.5   1557.5   -754.7   1509.5      394 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.5686 -0.7133 -0.0860  0.4988  4.9872 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  plot     (Intercept) 0.09391  0.3064  
##  Residual             0.33133  0.5756  
## Number of obs: 402, groups:  plot, 45
## 
## Fixed effects:
##                                       Estimate Std. Error t value Pr(>|z|)    
## (Intercept)                           -1.02639    0.24521  -4.186 2.84e-05 ***
## tree_nodes                             0.22537    0.02568   8.776  < 2e-16 ***
## tree_health_simplifiedozone            0.22387    0.09537   2.348   0.0189 *  
## tree_health_simplifiedozone_and_other  0.21017    0.07834   2.683   0.0073 ** 
## tree_expositionexposed                 0.11226    0.06787   1.654   0.0981 .  
## reforestedyes                         -0.32335    0.10358  -3.122   0.0018 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) tr_nds tr_hl_ tr____ tr_xps
## tree_nodes  -0.913                            
## tr_hlth_smp -0.054 -0.071                     
## tr_hlth_s__ -0.195 -0.006  0.468              
## tr_xpstnxps -0.035 -0.063 -0.093  0.071       
## reforestdys -0.184  0.064 -0.073  0.021  0.015

Fig 5a: Plot model aLL data with cohort

#
# https://rpubs.com/dgolicher/ggplots2
# https://stackoverflow.com/questions/46302410/how-to-specify-link-function-in-ggplot-glm-graph

g0<-ggplot(condition_model_all_C,aes(x=tree_nodes,y=tree_heigth))

# method = "glm", method.args = list(family = Gamma(link = "log"))
fig5a<-g0+geom_point(aes(colour=tree_health_simplified,
                         shape=tree_health_simplified), alpha=0.5, size= 2.5)+
  ylim(0,15)+
  geom_smooth(method = "glm", method.args = list(family = Gamma(link = "inverse")), aes(color= tree_health_simplified, fullrange =T))+
  scale_color_manual(values=  my_cols, labels= desired_names,
                     name= "Health status")+ 
  scale_shape_manual(name= "Health status", labels=desired_names, values = c(15,16,17)) + 
  labs(x="Tree age (years)", y= "Tree height")+
  theme_bw()+ 
  theme(legend.key.size = unit(0.7, 'cm'),
        legend.key.width= unit(0.7, 'cm'),
        legend.title = element_text(size=10),
        legend.text = element_text(size=10))+
  theme(text = element_text(size = 20))+
  theme(legend.title.align = 0.5)+
  ggtitle("a)")
## Warning in geom_smooth(method = "glm", method.args = list(family = Gamma(link =
## "inverse")), : Ignoring unknown aesthetics: fullrange
fig5a

Fig 5b: Plot model ORIGIN data with cohort

#
# https://rpubs.com/dgolicher/ggplots2
# https://stackoverflow.com/questions/46302410/how-to-specify-link-function-in-ggplot-glm-graph

fig5b<-g0+geom_point(aes(colour=tree_health_simplified,
                         shape=tree_health_simplified), alpha=0.5, size= 2.5)+
  ylim(0,15)+
  geom_smooth(method = "glm", method.args = list(family = Gamma(link = "log")), aes(color= tree_health_simplified, linetype = reforested), fullrange =T)+
  scale_color_manual(values=  my_cols, labels= desired_names,
                     name= "Health status")+ 
  scale_shape_manual(name= "Health status", labels=desired_names, values = c(15,16,17)) + 
  scale_linetype_manual(values= c("solid", "dashed"),labels = c("Natural \n regeneration", "Reforested"), name= "Origin")+
  labs(x="Tree age (years)", y= "Tree height")+
  theme_bw()+ 
  theme(legend.key.size = unit(0.7, 'cm'),
        legend.key.width= unit(0.7, 'cm'),
        legend.title = element_text(size=10),
        legend.text = element_text(size=10))+
  theme(text = element_text(size = 20))+
  theme(legend.title.align = 0.5)+
  ggtitle("b)")
fig5b

Fig 5c: model EXPOSITION data with cohort

#
# https://rpubs.com/dgolicher/ggplots2
# https://stackoverflow.com/questions/46302410/how-to-specify-link-function-in-ggplot-glm-graph

g0<-ggplot(condition_model_all_C,aes(x=tree_nodes,y=tree_heigth))

# method = "glm", method.args = list(family = Gamma(link = "log"))
fig5c<-g0+geom_point(aes(colour=tree_health_simplified,
                         shape=tree_health_simplified), alpha=0.5, size= 2.5)+
    ylim(0,15)+
  geom_smooth(method = "glm", method.args = list(family = Gamma(link = "log")), aes(color= tree_health_simplified,linetype = tree_exposition), fullrange =T)+
  scale_color_manual(values=  my_cols, labels= desired_names,
                    name= "Health status")+
  scale_shape_manual(name= "Health status", labels=desired_names, values = c(15,16,17)) + 
  scale_linetype_manual(values= c("solid", "dashed"),labels = c("Nursed", "Exposed"), name= "Tree exposition",) +
  labs(x="Tree age (years)", y= "Tree height")+
  #stat_smooth(method = "glm", method.args = list(family = Gamma(link = "log")), se = TRUE)+
  theme_bw()+ 
  theme(legend.key.size = unit(0.7, 'cm'),
        legend.key.width= unit(0.7, 'cm'),
        legend.title = element_text(size=10),
        legend.text = element_text(size=10))+
  theme(text = element_text(size = 20))+
  theme(legend.title.align = 0.5)+
  ggtitle("c)")
fig5c

Multiplot Fig 5

multiplot(fig5a, fig5b, fig5c, cols=1)

Figure S8

p_d <- ggplot(parcelas_long, aes(x=plot, y=n_trees, fill=tree_health_simplified)) +
  geom_bar_pattern(stat="identity",
                   aes(pattern = tree_health_simplified),
                   color = "black",
                   pattern_fill = "black",
                   pattern_density = 0.1,
                   pattern_spacing = 0.015,
                   pattern_key_scale_factor = 0.6) +
  scale_fill_manual(values= my_cols, breaks = desired_order,
                    labels= desired_names,
                    name= "Health status") + 
  scale_pattern_manual(name= "Health status", breaks = desired_order,
                       values=c ("none",  "none", "none", "none", 
                                 "crosshatch", "none", "stripe", "circle", "none", "none"),
                       labels = desired_names)
  

p_d <- p_d + theme_bw() +
  labs(x="Plots", y= "Number of trees") +
  theme(text = element_text(size = 20)) +
  ggtitle("")+
  coord_flip()
p_d 

Figure S11

# plot pies in map
p_satmap <-  ggmap(sat_map)
p_satmap +geom_scatterpie(data=parcelas_tidy,
                aes(x=X_coordinates_longitude,
                    y=X_coordinates_latitude,
                    group=plot),
                pie_scale = 1.5,
                cols=desired_order,
                color=NA,
                alpha=1)  +
  scale_fill_manual(values= my_cols, breaks = desired_order,
                    labels= desired_names,
                    name= "Health status") +
  theme(text = element_text(size = 20))

Figure S12

# Create new variable with percentage of ozone damage
parcelas_tidy<-parcelas_tidy %>% rowwise() %>% 
                     mutate(., 
                      total=sum(healthy,ozone,ozone_and_other,
                          drougth, acid_rain, other,
                          others_combined, dead, fungi,
                          # insect, 
                          worm)) %>%
                    mutate(perc.ozone= sum(ozone, ozone_and_other)/total)

#Statistical
leveneTest( parcelas_tidy$X_coordinates_altitude, parcelas_tidy$perc.ozone)
## Warning in leveneTest.default(parcelas_tidy$X_coordinates_altitude,
## parcelas_tidy$perc.ozone): parcelas_tidy$perc.ozone coerced to factor.
## Warning in anova.lm(lm(resp ~ group)): ANOVA F-tests on an essentially perfect
## fit are unreliable
#plot
p <- ggplot(parcelas_tidy) +
     geom_point(aes(x=X_coordinates_altitude,
             y=perc.ozone))


p<- ggplot(parcelas_tidy, aes(X_coordinates_altitude, perc.ozone ))+
  geom_point(color= "grey50", size = 3, alpha = 0.6)

p<-p + 
  stat_smooth(color = "skyblue", formula = y ~ x,fill = "skyblue", method = "lm") +
  stat_poly_eq(
    aes(label = paste(..eq.label.., ..adj.rr.label.., sep = '~~~~')),
    formula = y ~ x,  parse = TRUE,
      size = 5, # Tamaño de fuente de la fórmula
             label.x = 0.1, #location, la proporción entre 0-1
      label.y = 0.95)+
  labs(x="Plot altitude", y= "Percentage of ozone damaged trees within a plot")+
  theme_bw() +
   theme(plot.title = element_text(lineheight=1.1, face="bold")) +
  theme(text = element_text(size = 20), 
        axis.title.x = element_text(size = 16),
        axis.title.y = element_text(size = 16),
        axis.text=element_text(size=9))

p

ggsave("../outputs/FigS3.png",
       dpi = 600)

RESULTS Mean and sd of trees collected by each ranger during Participatory monitoring

# No se incluye a M (37) y PP (6), ya que eran colectados por diferentes miembros del team
trees<- c(135,133,130,174,150,139,165,166,133,137,125,105)
rangers<- c("A","B","C","D","E","F","G","H", "I", "J", "K", "L")
mean(trees)
## [1] 141
sd(trees)
## [1] 19.60519

Percentage damage in plots

#Percentage damage plots

PDP<-data.frame(parcelas_tidy$plot, parcelas_tidy$perc.ozone)

PDP<-PDP[order(PDP$parcelas_tidy.plot),]
range(PDP$parcelas_tidy.perc.ozone)
## [1] 0.00000 0.84375
head(PDP[order(PDP$parcelas_tidy.perc.ozone),])

Percentage per each health status

# Create new variable with porcentage of ozone damage

status_HS<- c("healthy","ozone","ozone_and_other",
                          "drougth", "acid_rain", "other",
                          "others_combined", "dead", "fungi",
                          "worm")
count_HS<-c(sum(parcelas_tidy$healthy), sum(parcelas_tidy$ozone),
            sum(parcelas_tidy$ozone_and_other),sum(parcelas_tidy$drougth),
            sum(parcelas_tidy$acid_rain),sum(parcelas_tidy$other),
            sum(parcelas_tidy$others_combined), sum(parcelas_tidy$dead),
            sum(parcelas_tidy$fungi), sum(parcelas_tidy$worm))
sum(count_HS)
## [1] 1765
# Tree number to each health status
percentageH<-data.frame(status_HS, count_HS)
percentageH
# Percentage of ozone and ozone + other trees
(sum(c(parcelas_tidy$ozone, parcelas_tidy$ozone_and_other))*100)/sum(count_HS)
## [1] 35.35411
# Percentage of healthy trees
(sum(parcelas_tidy$healthy)*100)/sum(count_HS)
## [1] 27.70538
# Percentage of others damages in trees with dead trees
(sum(c(parcelas_tidy$drougth, parcelas_tidy$acid_rain , parcelas_tidy$other, parcelas_tidy$others_combined,parcelas_tidy$dead, parcelas_tidy$fungi, parcelas_tidy$worm))*100)/sum(count_HS)
## [1] 36.94051
# Filtrar por categoría de daño
condition_HOz<-muestreo_tidy%>%
  filter(tree_health_simplified == "healthy" | tree_health_simplified == "ozone")

condition_HOz$tree_health_simplified <- as.factor(condition_HOz$tree_health_simplified)


# Damages by other NO ozone NO DEAD
levels(as.factor(muestreo_tidy$tree_health_simplified))
##  [1] "acid_rain"       "dead"            "drougth"         "fungi"          
##  [5] "healthy"         "other"           "others_combined" "ozone"          
##  [9] "ozone_and_other" "worm"
no_ozone<- dplyr::select(muestreo_tidy, contains(c("tree_health_simplified", "reforested", "tree_exposition"))) %>%
  filter(tree_health_simplified == "acid_rain"| tree_health_simplified == "fungi" | tree_health_simplified == "others_combined" | tree_health_simplified == "worm" | tree_health_simplified == "drougth"| tree_health_simplified == "other")

(609*100)/1765
## [1] 34.50425
# Percentage of DEAD trees
levels(as.factor(muestreo_tidy$tree_health_simplified))
##  [1] "acid_rain"       "dead"            "drougth"         "fungi"          
##  [5] "healthy"         "other"           "others_combined" "ozone"          
##  [9] "ozone_and_other" "worm"
dead<- dplyr::select(muestreo_tidy, contains(c("tree_health_simplified", "reforested", "tree_exposition"))) %>%
  filter(tree_health_simplified == "dead")
count(dead)
(43*100)/1765
## [1] 2.436261
sum(489,125, 499,1,9,70, 3,271,255, 43)
## [1] 1765

Chi test

count(dplyr::select(muestreo_tidy, contains(c("tree_health_simplified", "reforested", "tree_exposition"))) %>%
  filter(tree_health_simplified == "ozone"))
count(dplyr::select(muestreo_tidy, contains(c("tree_health_simplified", "reforested", "tree_exposition"))) %>%
  filter(tree_health_simplified == "ozone_and_other"))
OBS_ozo<-c(125,499)
ESP_ozo<-c(0.5, 0.5)
chisq.test(x = OBS_ozo,
           p = ESP_ozo)
## 
##  Chi-squared test for given probabilities
## 
## data:  OBS_ozo
## X-squared = 224.16, df = 1, p-value < 2.2e-16
#224.1603
#p-value < 2.2e-16
# p<0.001